import sys
from BasicLibraries import *
from scipy import spatial
import statsmodels.api as sm
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import KFold
from sklearn.metrics         import make_scorer

from joblib import Parallel, delayed
import pygad
import traceback
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 

global_path = ''
ONEDRIVE = ''

np.random.seed(5)

#%%
class AllocationGAD():

    def __init__(self):
        pass
    def generate_initial_population(self, X, bdims,  initial_population_size = 50):

        
        a = X.transpose().iloc[:bdims].values.ravel().tolist().copy()
        ip = []
        for i in range(initial_population_size):
            b = a.copy()
            for n in range(10):
                s = np.random.randint(bdims)
                b[s] = [0 if b[s]==1 else 1][0]
            ip.append(b)
        return ip

    def find_closest_allocation(self, OBS, LST):
        dist = []
        for i in range(len(LST)):
            tmp = spatial.distance.cosine(OBS,  LST.iloc[i]) #(OBS - LST.iloc[i]).abs().sum()
            dist.append(tmp)
        return dist
    def genetic_step(self, bdims, firm, observed_performance, identifier, control_vars, sdgs_number,\
                     num_of_controls, FRM, value_of_controls, sdgs, \
                     max_ini_number, modl,performance_percentile, model_type, parent_selection_option,crossover_option,mutation_types_option):

        #=== The seed should be here!!
        #================
        np.random.seed(5)
        #================
        try:
            print('########### ', identifier, ' with pyGAD ############')
            global optima_list
            optima_list = []
            global optima_values
            optima_values = []
            global perf_perc
            perf_perc = performance_percentile
            global maxInitiatives
            maxInitiatives = max_ini_number
            global estimation_model
            estimation_model = modl
            global finance, firm_attr
            finance=FRM.transpose().iloc[sdgs_number:].values.ravel().tolist()
            firm_attr = firm.index.values.tolist()
            #== Expected performance
            E_perf = modl.predict(np.array(firm).reshape(1,-1))[0]
            #=== Observed allocation
            observed_allocation = FRM.transpose().iloc[:-num_of_controls].values.ravel().tolist()
            if np.sum(observed_allocation) == 0:
                #== If the firms has not done anything, just ignore the observation
                return(0)
    
            ## While searching for the optimum, keep track of all the other
            def gad_opt(ga_instance, xftnx, solution_idx):
                ### Choose the budget constraint as a multiple that the observe max number of initiatives
                if np.sum(xftnx[:sdgs_number]) == 0 or np.sum(xftnx[:sdgs_number]) > 1.5*maxInitiatives:
                    return(-100)
                else:
                    L = np.array(list(xftnx[:sdgs_number])+finance)
                    if model_type == 'linear':
                        ftn = estimation_model.predict(L)
                    else:
                        ftn = estimation_model.predict(list(np.array(L).reshape(1,-1)))[0]
                    #== The optimisation has meaning only if the optimum is greater than the expected performance
                    if ftn > E_perf:
                        optima_list.append(L[:sdgs_number])
                        optima_values.append(ftn)
                        return(ftn)
                    else:
                        return(ftn)

            #== Fixed parameters
            fitness_function = gad_opt
            init_range_low = 0
            init_range_high = 2           
            num_generations = 100
            sol_per_pop = 10
            num_genes = sdgs_number
            keep_parents = 1
            num_parents_mating  = 5
            mutation_percent_genes=10
            
         
            #=== Variable for robustness check
            parent_selection_type=str(parent_selection_option)
            #===
            for crossover_type in crossover_option:
                for mutation_type_ in mutation_types_option:
                    initial_pop = self.generate_initial_population(FRM, bdims,  100)
                    ga_instance = pygad.GA(num_generations=num_generations,
                                           num_parents_mating=num_parents_mating,
                                           fitness_func=fitness_function,
                                           sol_per_pop=sol_per_pop,
                                           num_genes=num_genes,
                                           init_range_low=init_range_low,
                                           init_range_high=init_range_high,
                                           parent_selection_type=parent_selection_type,
                                           keep_parents=keep_parents,
                                           crossover_type=crossover_type,
                                           mutation_type=mutation_type_,
                                           mutation_percent_genes=mutation_percent_genes,
                                           gene_type=int,
                                           initial_population=tuple(initial_pop))
                    ga_instance.run()

            #=== This is the best fitness of all
            best_of_all = ga_instance.best_solution()[0]
            true_global_optimum = ga_instance.best_solution()[1]
            #===

            observed_allocation = FRM.transpose().iloc[:-num_of_controls].values.ravel().tolist()
            optima_list = pd.DataFrame(optima_list)
            optima_values = pd.DataFrame(optima_values)
            
            u = optima_values.sort_values(by = optima_values.columns[0]).copy()
            global_optimum = u.iloc[-1].iloc[0]
            global_allocation = list(optima_list.loc[u.index[-1]])
    
    
            
            #== Filter for those above the plane
            h = optima_values/global_optimum
            h = h[h[0] > performance_percentile ]
            optima_list = optima_list.loc[h.index]
            optima_values = optima_values.loc[h.index]        
            #=== Find the closest allocation to the observed allocation [including the global in the search]
            closest_allocation = self.find_closest_allocation(observed_allocation, optima_list)
            idx = np.where(closest_allocation == min(closest_allocation))[0]
            #=== Now, among all the possible closest allocations, find the one that is closest to the global ... 
            optima_list_reduced = optima_list.iloc[idx]
            #== .. but only if there is more tha one optimal list reduced
            if len(optima_list_reduced) > 0:
                closest_allocation = self.find_closest_allocation(global_allocation, optima_list_reduced)
                idx = np.where(closest_allocation == min(closest_allocation))[0]
                optima_list_reduced = optima_list_reduced.iloc[idx]
                #=== Of those, choose the most parsimonious one
                minimal_list = optima_list_reduced.sum(axis = 1)
                if len(minimal_list) > 0:
                    idx = np.where(minimal_list == min(minimal_list))[0]
                    optima_list_reduced = optima_list_reduced.iloc[idx]
                    #=== Remove duplicates
                    optima_list_reduced = optima_list_reduced.drop_duplicates()
                    #=== Take the one with the maximum optima
                    optima_values_reduced = optima_values.loc[optima_list_reduced.index]
                    idx = np.where(optima_values_reduced == optima_values_reduced.max())[0][0]
                    idx = optima_values_reduced.index[idx]
                    optima_list_reduced = optima_list_reduced.loc[idx]
                    #=== local optimum
                    local_optimum = optima_values.loc[idx].iloc[0]
        
                else:
                    optima_list_reduced = pd.DataFrame(global_allocation, index = sdgs).transpose()
                    optima_values_reduced = global_optimum
                    local_optimum = global_optimum
            else:
                local_optimum = optima_values.loc[optima_list_reduced.index].iloc[0].iloc[0]
    
    
            #=== Recast the data
            observed_allocation = FRM.transpose().iloc[:-num_of_controls]     
            optima_list_reduced = optima_list_reduced.transpose()
            optima_list_reduced.index = observed_allocation.index
    
            #=== Now get some statistics
            #== If 1 -> Vectors are ortoghonal
            closest_allocation_distance = spatial.distance.cosine(optima_list_reduced.values.ravel(), observed_allocation.values.ravel())
            closest_allocation_distance_HUM = abs((observed_allocation.values.ravel() - optima_list_reduced.values.ravel())).sum()
            closest_allocation = optima_list_reduced
            closest_allocation_performance = local_optimum
            #= Estimate the number of overlapping allocations
            opa  = closest_allocation.replace(0, np.nan).values.ravel()
            oba  = observed_allocation.replace(0, np.nan).values.ravel()
            alignment = opa-oba
            alignment = len(pd.DataFrame(alignment).dropna())
            ##  Total number of initiatives
            total_number_obs_initiatives = observed_allocation.sum().values[0]    
        
            #=== Distance from best solution of all 
            best_of_all = pd.DataFrame(best_of_all, index = observed_allocation.index )
            best_allocation_distance = spatial.distance.cosine(best_of_all.values.ravel(), observed_allocation.values.ravel())
            best_allocation_distance_HUM = abs((best_of_all.values.ravel() - observed_allocation.values.ravel())).sum()
        
            ## Firm description -- the score should be a function of these characteristics
            firm_summary = dict()
            summary_info = pd.DataFrame([closest_allocation_distance, closest_allocation_distance_HUM, alignment, 
                                         observed_performance,  
                                         E_perf, 
                                         closest_allocation_performance, \
                                         best_allocation_distance,  best_allocation_distance_HUM, total_number_obs_initiatives], \
                                         index = ['Distance_from_closest_optimum', 
                                                 'Humming_distance_from_closest_optimum',
                                                 'Matching_initiatives',
                                                 'Performance',
                                                 'ExpectedPerformance',
                                                 'OptimalPerformance',
                                                 'Distance_from_best_optimum',
                                                 'Humming_distance_from_best_optimum',
                                                 'Total_number_of_initiatives'], columns = [identifier]).transpose()
        
            firm_summary[str(identifier)] = {'Score information': summary_info, 
                                             'Optimal allocation': closest_allocation, 
                                             'Observed allocation': observed_allocation,
                                             'Best of all allocations': best_of_all}
            resultsDICTIONARY_tmp = dict()
            resultsDICTIONARY_tmp['local_optimum'] = local_optimum
            resultsDICTIONARY_tmp['global_optimum'] = global_optimum
            resultsDICTIONARY_tmp['true_global_optimum']    =  true_global_optimum
            resultsDICTIONARY_tmp['observed_performance'] = observed_performance
            resultsDICTIONARY_tmp['expected_performance'] = E_perf
            resultsDICTIONARY_tmp['closest_allocation'] = closest_allocation
            resultsDICTIONARY_tmp['observed_allocation'] = observed_allocation
            resultsDICTIONARY_tmp['best_allocation'] = best_of_all
            resultsDICTIONARY_tmp['closest_allocation_distance'] = closest_allocation_distance
            resultsDICTIONARY_tmp['closest_allocation_distance_humming'] = closest_allocation_distance_HUM
            resultsDICTIONARY_tmp['best_allocation_distance'] = best_allocation_distance
            resultsDICTIONARY_tmp['best_allocation_distance_humming'] = best_allocation_distance_HUM
            resultsDICTIONARY_tmp['total_number_obs_initiatives'] = total_number_obs_initiatives
            resultsDICTIONARY_tmp['firm_summary'] = firm_summary


            return(resultsDICTIONARY_tmp)

        except Exception:
            print('Exception occurred at', identifier) 
            print(traceback.format_exc())
            return(0)
    
    
    def firm_wise_optimization(self, iteration_, x, y, modl, max_ini_number, dimension_, \
                               control_vars, names, nSDGs, sdgs,  model_type, \
                                   parent_selection_option,crossover_option,mutation_types_option,control_boundaries = None, performance_percentile = 0.99):
        '''
        firm:                   the row of the firm with initiatives, characteristics
        control_vars:           the control variables names
        observed_performance:   the empirical performance of the firm
        identifier:             either gvkey of company name
        dimension_:             the initiative names
        control_vars:           the control variables, i.e.,  control_vars = controls
        names:                  the name of the initiatives
        get_optimum:            if True, then run the optimization process, otherwise, just search over the possibilities
        The function returns the best allocation strategy for the firm
        '''
        
        firm = x.iloc[iteration_]
        observed_performance = y.iloc[iteration_]
        identifier = x.index[iteration_]
        num_of_controls = len(control_vars)
        FRM = pd.DataFrame(firm).transpose()
        value_of_controls = FRM[FRM.columns[-num_of_controls:]].values.tolist()[0]

        bdims = nSDGs 

        resultsDICTIONARY_tmp = \
            self.genetic_step(bdims, firm, observed_performance, identifier, control_vars, nSDGs,\
                              num_of_controls, FRM, value_of_controls, sdgs, \
                                  max_ini_number, modl,performance_percentile, model_type,parent_selection_option,crossover_option,mutation_types_option)

        
        resultsDICTIONARY = resultsDICTIONARY_tmp
        
        return(resultsDICTIONARY)
        
    def get_corr(self, y, x, m):
        l = pd.DataFrame(y)
        l['a'] = m.predict(np.array(x))
        l.columns = ['Observed', 'Estimated']
        l = l[(l>l.quantile(0.02)) & (l<l.quantile(0.98))].dropna()
        cor = pearsonr(l['Observed'], l['Estimated'])[0]
        sig = pearsonr(l['Observed'], l['Estimated'])[1]
        return cor, sig
    #==== Feature importances
    def make_importance_matrix(self, a):
        sdg = a.reset_index()['index'].apply(lambda x: x.split(' - ')[1])
        ini = a.reset_index()['index'].apply(lambda x: x.split(' - ')[0])
        a.insert(0, 'sdg', sdg.values.tolist())
        a.insert(1, 'ini', ini.values.tolist())
        a.columns = ['sdg', 'ini', 2]
        fi = a.pivot('sdg', 'ini', 2).loc[a.pivot('sdg', 'ini',2).index[::-1]]
        fi.loc[:, 'Total'] = fi.sum(axis = 1)
        fi.loc['Total'] = fi.sum(axis = 0)
        fi = fi.sort_values(by = 'Total', axis = 0, ascending = False)
        fi = fi.sort_values(by = 'Total', axis = 1, ascending = False)
        fi = fi.drop(columns = ['Total'])
        fi = fi.drop(index = ['Total'])
        return fi

    def behavioural_importance(self, forest, col_names, bdims, tdims):
        a = pd.DataFrame(forest.feature_importances_[:bdims], index = list(col_names[:bdims]))
        fi = self.make_importance_matrix(a)
        return fi
    def financial_importance(self, forest, col_names, bdims, tdims):
        a = pd.DataFrame(forest.feature_importances_[bdims:tdims], index = list(col_names[bdims:tdims]))
        return a
    def regional_importance(self, forest, col_names, bdims, tdims):
        a = pd.DataFrame(forest.feature_importances_[tdims:], index = list(col_names[tdims:]))
        return a


    #==== Get the fitted model
    def fit_model(self, dat,  sdgs_number= 0,\
                     dimension_ = '', gvkey_list = None, model_type = '', 
                     normal_fit=True, 
                     energy_price_control=False, ## It should always be False except for robustness
                     binarization_quantile=0.75,
                     FW=1):



        #========
        sdgs_local = [[d+' - SDG '+str(s) for s in sdgs_number] for d in dimension_]
        sdgs_local = [item for sublist in sdgs_local for item in sublist]

        ### Get the target function
        dat = dat.loc[dat[['next_year_financial_performance', 'next_year_non_financial_performance', 'next_year_performance']].dropna().index]
        dat.loc[:, 'target_function'] = dat['next_year_performance']
        ################################################################################################  
    
        ####### Main   
        dat = dat.loc[dat.target_function.dropna().index]
        dat = dat[dat.Total_invested_capital_BS1>0]
        dat = dat[dat.sale_usd>0]
        dat = dat[dat.ppent_usd>0]
        dat = dat[dat.at_usd>0]

        dat['inv'] = dat.Total_invested_capital_BS1/dat.sale_usd #
        dat = dat[dat.inv < dat.inv.quantile(0.99)]
        #== Assets characteristics
        dat['TangibleAssets'] = dat['ppent_usd']/dat.at_usd
        #=== Shares buyback rate
        dat['buyback_rate'] = (dat['iss_eq_usd_ref']/(dat.sale_usd*1000000))
        dat['dividends'] = dat['dps_usd_ref']

        #== Controls
        controls = ['inv', 'firm_size', 'TangibleAssets', 'dividends',  'buyback_rate', 'MarketLeverage']

        if FW<1:
            print('Adding emission source controls')
            controls+=['Estimated', 'Reported']


        #== 08/24 revision
        if energy_price_control:
            print('Adding a control for energy price index')
            controls+=['EnergyPriceIndex']

        ################################################################################################  
        controls_num = len(controls)
        sub_dat = dat[sdgs_local + controls + ['target_function', 'conml', 'gvkey', 'rfyear', 'MacroRegion']].dropna()
        sub_dat = sub_dat.sort_values(by = ['gvkey', 'rfyear'])
        sub_dat = sub_dat[sub_dat.target_function.abs() < sub_dat.target_function.abs().quantile(0.99)]        
        reduced_gvkey_list = sub_dat[sub_dat.gvkey.isin(gvkey_list)].gvkey.unique()
        
        
        #== Get dummies   
        geo = pd.get_dummies(sub_dat['MacroRegion'], drop_first=True)
        #== 08/24 revision
        if energy_price_control:
            ## If you include the energy price index do not include year fixed effects
            print('Adding a control for energy price index')
            dums = geo
        else:
            yea = pd.get_dummies(sub_dat['rfyear'], drop_first=True)
            yea.columns = [str(int(n)) for n in yea.columns]
            dums = pd.concat((geo, yea), axis = 1)

        sub_dat = pd.concat((sub_dat, dums), axis = 1)
        
        ## Isolate dependent and independent variables
        x = sub_dat.set_index('gvkey').loc[:, sdgs_local + controls + list(dums.columns)]
        y = sub_dat.set_index('gvkey').loc[:, 'target_function']       

        #===== AFTER REVIEW: test also the validity of the results under different thresholds
        quantiles = x[sdgs_local].quantile(binarization_quantile)
        x[sdgs_local] = x[sdgs_local][(x[sdgs_local] >= quantiles) & (x[sdgs_local] > 0)].replace(np.nan, 0).clip(0,1).round()
        names = pd.Series(x.drop(columns = list(dums.columns)).columns[:-controls_num]).apply(lambda x: x.split(' - ')[1])



        # Quick clean-up 
        #== Update the controls with the countries dummies
        controls = controls + list(dums.columns)

        #=== Changed after first draft
        x = x.astype(float)
        
        
        #===
        ################################################################################################  
        ## Run the optimization depending on the optimization function
        ## Note, you return -pred because you could use this into the optimization function
        ## However -- when you explore the space -- you need another - in front of it
        if model_type == 'linear':
            print('Estimate the fitness function with OLS ...')     
            m  =  sm.OLS(y, x).fit()          
            sdgs_reduced = sdgs_local
            relevant_features, irrelevant_features = m.resid, m
            
        elif model_type == 'RandomForest':
            #=== Select the features
            fi = pd.DataFrame()
            for _ in range(10):                        
                sam = np.random.choice(x.index.unique(), int(len(x.index.unique())*0.75))
                x_sam, y_sam = x.loc[sam].copy(), y.loc[sam].copy()
                selector = SelectFromModel(estimator= RandomForestRegressor(), threshold = '0.25*median').fit(x_sam, y_sam)
                fiT = selector.get_support()
                fi = pd.concat((fi, pd.DataFrame(fiT, index = x.columns).replace([True, False], [1,0])), axis = 1)
            necessary_features = dums.columns
            features = fi.drop(index = dums.columns).mean(axis = 1).round()
            relevant_features = features[features > 0]
            irrelevant_features = features[features == 0]
            x = x[list(relevant_features.index)+list(necessary_features)]
            print('Parameter estimation with an explicit cross-validation step (it will take longer) ...')
           

            param_grid = {'n_estimators': [60,80, 100],
                      'max_depth': [64, 128, 256], 
                      'min_samples_leaf': [2, 4, 8],
                      'max_features': ['auto', 'sqrt']}  
            def custom_loss_function(y_pred, y_true):
                score_overall = pearsonr(y_pred, y_true)[0]
                return score_overall       
            custom_scorer = make_scorer(custom_loss_function, greater_is_better=True)

            rf_ = RandomForestRegressor(random_state=5)
            rf_cv = GridSearchCV(estimator = rf_, param_grid = param_grid, \
                                 cv = KFold(5, random_state=5, shuffle=True), n_jobs = -1, refit=True)
            rf_cv.fit(np.array(x), np.array(y))
            m = rf_cv.best_estimator_
            
            sdgs_reduced = list(set(sdgs_local) & set(list(relevant_features.index)))
        else:
            print('Wrong model')
            raise ('Warning error')
        print('... done')


        return x, y, m, sdgs_reduced, controls, names, reduced_gvkey_list, relevant_features, irrelevant_features

    
    def main_run(self, dat,  parent_selection_option,crossover_option,mutation_types_option, sdgs_number= 0,\
                     dimension_ = '', gvkey_list = None, \
                          model_type = 'linear', quantile_bound= 1, \
                             performance_percentile = 0.99, normal_fit = True, energy_control=False,FW=1):
        '''
        dat:            This is the dataset comprising the behavioral data + accounting + returns
        target:         What is the value to optimize? If 'joint' than is a combination of financial and non financial performance
        dimension:      Either SDG or initiative depending on the previous choice
        gvkey_list:     If you focus on a restricted list of firms provide a list of gvkey, otherwise is None
        model_type:     What is the optimization function (linear, kernel)
        '''
        
        #== Prepare the data and fit the model
        x, y, forst, sdgs_reduced, controls, names, reduced_gvkey_list, relevant_features, irrelevant_features = self.fit_model(dat, sdgs_number= sdgs_number, \
                     dimension_ = dimension_, gvkey_list = gvkey_list,  model_type = model_type, normal_fit = normal_fit, 
                     energy_price_control=energy_control,FW=FW)
       
        if gvkey_list != None:
            x = x.loc[reduced_gvkey_list]
            y = y.loc[reduced_gvkey_list]
        nSDGs = len(sdgs_reduced)
        #===== Constrained
        t = x[sdgs_reduced].sum(axis = 1)
        t = t[t > 0].dropna()
        max_ini_number = np.round(t.quantile(quantile_bound))
        #===== Get the companies that are there in the latest possible year
        latest_possible_year =  dat.rfyear.max()
        latest_possible_firm =  dat[dat.rfyear == latest_possible_year].gvkey.unique()
        #===== Now only retains those companies from x and y
        x = x[x.index.isin(latest_possible_firm)]
        y = y[y.index.isin(latest_possible_firm)]
        #===== Now get only observations for the last year
        x = x.groupby(x.index).last()
        y = y.groupby(y.index).last()
        #=== Parallel version
        resultsDICTIONARY = Parallel(n_jobs=6)(delayed(self.firm_wise_optimization)(i,  x, y,  forst, max_ini_number, \
                                                   dimension_, controls, names, nSDGs,  sdgs_reduced, model_type, \
                                                       parent_selection_option,crossover_option,mutation_types_option,
                                                       performance_percentile = performance_percentile) for i in range(len(x)))


        return(resultsDICTIONARY, relevant_features, irrelevant_features)


    #========================================================================
    #========================================================================
    
  
    def make_score(self, X, init_score):
        '''
        Still working on this. 
        We really should have more data to get a sense of what works and what not
        '''
        #===
        #return((X['ExpectedPerformance']/X['OptimalPerformance'])/(1+X['Distance_from_closest_optimum']))
        return(1./(100*(X['OptimalPerformance'].abs()-X['ExpectedPerformance'].abs()))*1/(1+X['Distance_from_closest_optimum']))


    def calculate_firm_statistics(self, X, i, dt, plot_ = False):
        scores_stats = pd.DataFrame()
        g_score_vec = []
        g = int(list(X[i]['firm_summary'].keys())[0])
        init_score = dt[dt.gvkey == g]['number_of_initiatives_score'].values[0]
        S = X[i]['firm_summary'][str(g)]['Score information'].transpose().copy()
        distance_from_closest_optimum = S.transpose()['Distance_from_closest_optimum'].iloc[0]
        scores_stats = pd.concat((scores_stats, S.transpose()))
        g_score_vec.append(self.make_score(S.transpose(), init_score).iloc[0])

        ### Optimum    
        a = pd.DataFrame(X[i]['firm_summary'][str(g)]['Optimal allocation'].copy())
        sdg = a.reset_index()['index'].apply(lambda x: x.split(' - ')[1])
        ini = a.reset_index()['index'].apply(lambda x: x.split(' - ')[0])
        a.insert(0, 'sdg', sdg.values.tolist())
        a.insert(1, 'ini', ini.values.tolist())
        a.columns = ['sdg', 'ini', 0]
        opt = a.pivot('sdg', 'ini', 0).loc[a.pivot('sdg', 'ini', 0).index[::-1]]
        ### Observed 
        a = pd.DataFrame(X[i]['firm_summary'][str(g)]['Observed allocation'].copy())
        sdg = a.reset_index()['index'].apply(lambda x: x.split(' - ')[1])
        ini = a.reset_index()['index'].apply(lambda x: x.split(' - ')[0])
        a.insert(0, 'sdg', sdg.values.tolist())
        a.insert(1, 'ini', ini.values.tolist())
        a.columns = ['sdg', 'ini', 2]
        obs = a.pivot('sdg', 'ini', 2).loc[a.pivot('sdg', 'ini',2).index[::-1]]

        difference_matrix = obs - opt
    
        g_score = np.mean(g_score_vec)
        return(g, obs, opt, difference_matrix, scores_stats, g_score_vec, g_score, distance_from_closest_optimum )

    def get_performances(self, X, i):
        try:
            g = int(list(X[i]['firm_summary'].keys())[0])
            S = X[i]['firm_summary'][str(g)]['Score information'].transpose().copy()
            return S.loc[['Performance', 'ExpectedPerformance', 'OptimalPerformance']].transpose().reset_index()
        except Exception:
            return 0


